library(drake)
library(tidyverse)
loadd(data, data_trans, data_mice)
Look at NA values in raw data
map_int(data, ~sum(!is.na(.)))
## iso3c n_amr_events
## 190 59
## n_amr_first_events health_expend_perc
## 59 183
## migrant_pop_per_capita population
## 190 190
## ab_export_bin english_spoken
## 162 190
## human_consumption_ddd livestock_pcu
## 68 164
## ab_export_per_capita ab_import_per_capita
## 87 161
## livestock_consumption_kg_per_capita gdp_per_capita
## 31 190
## tourism_outbound_per_capita tourism_inbound_per_capita
## 44 111
## pubcrawl_per_capita promed_mentions_per_capita
## 180 189
data %>%
filter(pubcrawl_per_capita == 0 | is.na(pubcrawl_per_capita)) %>%
nrow()
## [1] 33
data %>%
filter(promed_mentions_per_capita == 0 | is.na( promed_mentions_per_capita)) %>%
nrow()
## [1] 1
data %>%
filter(ab_export_per_capita == 0 | is.na( ab_export_per_capita)) %>%
nrow()
## [1] 103
Look at NA values and distributions post-NA processing
map_int(data_trans, ~sum(!is.na(.)))
## iso3c n_amr_events
## 190 190
## health_expend_perc ln_migrant_pop_per_capita
## 183 190
## ln_population ab_export_bin
## 190 190
## english_spoken human_consumption_ddd
## 190 68
## ln_livestock_pcu ln_ab_export_per_capita
## 164 190
## ln_ab_import_per_capita ln_livestock_consumption_kg_per_capita
## 161 31
## ln_gdp_per_capita ln_tourism_outbound_per_capita
## 190 44
## ln_tourism_inbound_per_capita ln_pubcrawl_per_capita
## 111 190
## ln_promed_mentions_per_capita
## 190
map_lgl(data_trans, ~any(is.infinite(.))) # confirm no infinite values
## iso3c n_amr_events
## FALSE FALSE
## health_expend_perc ln_migrant_pop_per_capita
## FALSE FALSE
## ln_population ab_export_bin
## FALSE FALSE
## english_spoken human_consumption_ddd
## FALSE FALSE
## ln_livestock_pcu ln_ab_export_per_capita
## FALSE FALSE
## ln_ab_import_per_capita ln_livestock_consumption_kg_per_capita
## FALSE FALSE
## ln_gdp_per_capita ln_tourism_outbound_per_capita
## FALSE FALSE
## ln_tourism_inbound_per_capita ln_pubcrawl_per_capita
## FALSE FALSE
## ln_promed_mentions_per_capita
## FALSE
data_trans %>%
select(-iso3c, -n_amr_events, -english_spoken) %>%
gather() %>%
ggplot(aes(x = value)) +
geom_histogram() +
facet_wrap(key~., scales = "free")
data_trans %>%
dplyr::select(-iso3c, -ln_livestock_pcu, -ln_ab_import_per_capita, -ab_export_bin, -english_spoken) %>%
PerformanceAnalytics::chart.Correlation(., histogram = TRUE, pch = 19, method = "spearman")
Look at imputed data
plot(data_mice) # On convergence, the different streams should be freely intermingled with one another, without showing any definite trends. Convergence is diagnosed when the variance between different sequences is no larger than the variance within each individual sequence.
show_imputes(data_mice, m = data_mice[["m"]], raw = data_trans)
imp <- complete(data_mice)
imp %>%
dplyr::select(-iso3c) %>%
PerformanceAnalytics::chart.Correlation(., histogram = TRUE, pch = 19, method = "spearman")